open Sets

(** Provides transformations for deterministic finite automata (DFAs) *)

type regexp =
| Char of char
| Star of regexp
| Concat of regexp list
| Union of regexp list;;

type 'a nfa = 'a * 'a list * ('a * 'a) list * ('a * int * 'a) list;;

type 'a dfa = 'a nfa;;
(*
	let example_dfa : int dfa =
	  0,[2],[],
	  [0,'a',1; 1,'a',0; 2,'a',3; 3,'a',2; 0,'b',2; 2,'b',0; 1,'b',3; 3,'b',1];;
*) 


(** Converts a DFA into a string *)
let toString (nQ, nSig, delta, q0, finals) =
	let string_of_delta (map: int Int2Map.t) =
		Int2Map.fold
			(fun (p, a) q str ->
						"\t("^(string_of_int p)^", "^(string_of_int a)^
						") -> "^(string_of_int q)^"\n"^str)
			map
			""
	in
	"states: 1.." ^ (string_of_int nQ) ^ "\n" ^
	"letters: 1.." ^ (string_of_int nSig) ^ "\n" ^
	"initial state: 1 \n" ^
	"transitions:\n" ^ (string_of_delta delta) ^ "\n" ^
	"final set: " ^ (Std.dump (IntSet.elements finals)) ^ "\n";;


(** Converts an NBA type into an DFA type. Fails if NBA is not deterministic. *)
let from_nba (nQ,nS,del,ini,fin) =
	let del' =
		Int2Map.fold
			(fun k d map ->
				if (IntSet.cardinal d)>1 then failwith "Dfa: from_nba";
				Int2Map.add k (List.hd (IntSet.elements d)) map)
			del
			Int2Map.empty
	in
	let ini' =
		if (IntSet.cardinal ini)>1 then failwith "Dfa: from_nba";
		List.hd (IntSet.elements ini)
	in
	(nQ,nS,del',ini',fin);;


(** Renumbers the states such that the automaton uses all states from 1 to nQ' *)
let renumber ((_:int), (nS:int), (del: int Int2Map.t), (ini:int), (fin: IntSet.t)) =
	let usedStates = Int2Map.fold (fun (p,a) q set -> IntSet.add q (IntSet.add p set)) del IntSet.empty in
	let max = IntSet.max_elt usedStates in
	let renAr = Array.make (max + 1) 0 in  (* renaming map (disregardin cell 0) *)
	let i = ref 1 in
	IntSet.iter (fun q -> renAr.(q) <- !i; i := !i + 1) usedStates;
	(*for i=1 to max do
		print_endline ((Std.dump i)^" --> "^(Std.dump renAr.(i)));
	done;*)
	let nQ' = IntSet.cardinal usedStates in 
	let fin' = IntSet.fold (fun q set -> IntSet.add renAr.(q) set) fin IntSet.empty in
	let del' = Int2Map.fold 
		(fun (p,a) q map -> Int2Map.add (renAr.(p),a) renAr.(q) map) 
		del 
		Int2Map.empty in
	(nQ', nS, del', renAr.(ini), fin');;
	
		


(** Converts a DFA into a minimal DFA. It implements the standard O(n^2) algorithm.  *)
let minimize dfa = 
	(*--- print_endline (toString dfa); *)  
	let ((nQ:int), (nS:int), (del:int Int2Map.t), (ini:int), (fin: IntSet.t)) = dfa in
	let marked = Array.make_matrix (nQ + 1) (nQ + 1) false in  (* array starts at 0, but we skip 0-coordinates *)
	(* initial markings (q,q') where one state is in F and the other not *)
	for x = 1 to nQ do
		for y = x+1 to nQ do
			let notequiv = (not(IntSet.mem x fin) && (IntSet.mem y fin)) || ((IntSet.mem x fin) && not(IntSet.mem y fin)) in 
			if notequiv then begin
				marked.(x).(y) <- true;
				marked.(y).(x) <- true; (* for symetry *)
			end;
		done
	done;

	(* propagate markings *)
	let changeflag = ref true in
	while !changeflag do
		changeflag := false;
		(* process only pairs (q,q'), where q<q' *)
		for x = 1 to nQ do
			for y = x+1 to nQ do
				if not(marked.(x).(y)) then begin
					(* if for some letter they go into non-equivalent states *)
					for a = 1 to nS do
						let x' = Int2Map.find (x, a) del in
						let y' = Int2Map.find (y, a) del in
						let mi = min x' y' in
						let ma = max x' y' in
						if marked.(mi).(ma) then begin
							marked.(x).(y) <- true;
							marked.(y).(x) <- true; (* for symetry *)
							changeflag := true
						end
					done
				end
			done
		done
	done;
	(* print marking table *)
(*	for x=1 to nQ do
		for y =1 to nQ do
			if marked.(x).(y) then print_string "x " else print_string "_ ";
		done;
		print_endline "";
	done;*)
	
	(* let q2r = Array.make (nQ+1) 0 in state --> class representant    *)
	(* find for each state the least representant of the same equiv. class:*)
	(* - look in line whether there is an unmarked cell *)
	(* - take the unmarked cell with least x coordinate *)
	(* - recursively look for least representant of state x *)
	let rec least_repr (q:int) = 
		let rec least_in_line (p:int) (p':int) = 
			if p=p' then p' else 
				if marked.(p).(p') then least_in_line (p+1) p' else
					p in
		let x = least_in_line 1 q in
		if x = q then q else least_repr x
	in  
	let reprAr = Array.make (nQ + 1) 0 in  (* representations of equivalence classes *)
	for q=1 to nQ do  (* for speed-up save mapping 'state->class_repr' as array *)
		reprAr.(q) <- least_repr q;
	done;		  
	
	let fin' = IntSet.fold (fun q set -> IntSet.add reprAr.(q) set) fin IntSet.empty in
	let del' = Int2Map.fold 
		(fun (p,a) q map -> Int2Map.add (reprAr.(p),a) reprAr.(q) map) 
		del 
		Int2Map.empty in
	renumber (123, nS, del', reprAr.(ini), fin');;


(*



(* removes all other l's from the list *)
let rec remove l x =
  match l with
  | [] -> []
  | h::t ->
    let t' = remove t x in
    if x = h then t' else h::t';;

(* checks whether l1 \cup l2 != {} *)
let rec touches l1 l2 =
  match l1 with
  | [] -> false
  | h::t -> if mem h l2 then true else touches t l2;;

(* removes duplicates from a list *)
let rec set l =
  match l with
  | [] -> []
  | h::t -> h::(remove (set t) h);;

let minus =
  fold_left remove;;

(* extracts states: returns set of states *)
let states : 'a nfa -> 'a list =
  fun (q0,f,d',d) ->
    set ([q0]@  (* initial *)
	 		f@  (* finals *)
			(map fst d')@(map snd d')@  (* states of epsilon transitions *)
      	(map (fun (x,y,z) -> x) d)@(map (fun (x,y,z) -> z) d));;  (* states of transitions *)

(* extracts the alphabet: returns set of letters *)
let alphabet : 'a nfa -> int list =
  fun (q0,f,d',d) ->
    set (map (fun (x,y,z) -> y) d);;

(* returns position of element x in the list l *)
let rec index l x =
  match l with
  | [] -> failwith "index"
  | h::t -> if h = x then 0 else 1 + index t x;;

(* returns an automaton, where states are numbered by integers *)
let renumber : 'a nfa -> int nfa =
  fun ((q0,f,d',d) as m) ->
    let r = index (states m) in  (* each state gets a unique index. returns r:state->index *)
    (r q0),(map r f),(map (fun (x,y) -> (r x),(r y)) d'), (map (fun (x,y,z) -> (r x),y,(r z)) d);;

(* constructs the product automaton *)
let union_nfa : 'a nfa -> 'a nfa -> int nfa =
  fun (q1,f1,d1',d1) (q2,f2,d2',d2) ->
    let h1 x = 1,x in
    let h2 x = 2,x in
    let m =
	 	(0,q1), (* new initial state *)
	 	(set ((map h1 f1)@(map h2 f2))),  (* new final states *)
      (set ([(0,q1),(1,q1); (0,q1),(2,q2)]@  (* eps-transition from initial new initial state *)
	     (map (fun (x,y) -> (h1 x),(h1 y)) d1')@
        (map (fun (x,y) -> (h2 x),(h2 y)) d2'))),
      (set ((map (fun (x,y,z) -> (h1 x),y,(h1 z)) d1)@
        (map (fun (x,y,z) -> (h2 x),y,(h2 z)) d2))) in
    renumber m;;

let star_nfa : 'a nfa -> int nfa =
  fun (q,f,d',d) ->
    let h1 x = 1,x in
    let m = (0,q),(set ((0,q)::(map h1 f))),
      (set ([(0,q),(1,q)]@
        (map (fun (x,y) -> (h1 x),(h1 y)) d')))@
        (map (fun x -> (h1 x),(0,q)) f),
      (set (map (fun (x,y,z) -> (h1 x),y,(h1 z)) d)) in
    renumber m;;

let concat_nfa : 'a nfa -> 'a nfa -> int nfa =
  fun (q1,f1,d1',d1) (q2,f2,d2',d2) ->
    let h1 x = 1,x in
    let h2 x = 2,x in
    let m = (1,q1),(set (map h2 f2)),
      (set (map (fun (x,y) -> (h1 x),(h1 y)) d1')@
        (map (fun x -> (h1 x),(2,q2)) f1)@
        (map (fun (x,y) -> (h2 x),(h2 y)) d2')),
      (set ((map (fun (x,y,z) -> (h1 x),y,(h1 z)) d1)@
        (map (fun (x,y,z) -> (h2 x),y,(h2 z)) d2))) in
    renumber m;;

(* subset construction *)
let dfa_of_nfa : 'a nfa -> int dfa =
  fun ((q0,f,d',d) as m) ->
    let s = alphabet m in
	(* computes the transitive closure, given a list and a successor function.
		recursive call is needed because of eps-transitions! *)
    let rec close l f =
      let l' = set (l@(flatten (map f l))) in
      if l = l' then l else close l' f in
	 (* returns a list of successors wrt. the eps-transition *)
    let lclose l = close l
      (fun x -> map snd (filter (fun (x',y) -> x = x') d')) in
	 (* returns a set of all successors of a state q1 and the letter c *)
    let dd q1 c =
	 	lclose (
	 		map
			 	(fun (x,y,z) -> z)
	      	(filter (fun (x,y,z) -> y = c && mem x q1) d)
		) in
    let q0' = lclose [q0] in
    let q = close [q0']
      (fun q1 -> map (dd q1) s) in
    let m =
	 	q0',
		(filter (touches f) q),
		[],  (* no eps-transitions *)
      (flatten (
			map
				(fun q1 -> map (fun c -> q1,c,(dd q1 c)) s)
				q)
		)
	 in
    renumber m;;

let step : 'a dfa -> 'a -> int -> 'a =
  fun (q0,f,d',d) q c ->
    let rec step d =
      match d with
      | [] -> failwith "step"
      | (x,y,z)::t -> if x = q && y = c then z else step t in
    step d;;

(* minimize a DFA *)
let minimize : 'a dfa -> int dfa =
  fun ((q0,f,d',d) as m) ->
    let s = alphabet m in
    let q = states m in
    let z0 = [f; minus q f] in
    let rec lookup z q1 =
      match z with
      | [] -> failwith "minimize"
      | h::t -> if mem q1 h then h else lookup t q1 in
    let equiv z q1 q2 =
      hd (lookup z q1) = hd (lookup z q2) in
    let equiv1 z q1 q2 =
      fold_left (fun b c -> b && equiv z (step m q1 c) (step m q2 c)) true s in
    let rec partition z l =
      match l with
      | [] -> []
      | q1::t ->
        let rec split l =
         (match l with
          | [] -> [],[]
          | h::t ->
            let l1,l2 = split t in
            if equiv1 z h q1 then h::l1,l2 else l1,h::l2) in
        let l1,l2 = split t in
        (q1::l1)::(partition z l2) in
    let rec fix z =
      let z' = flatten (map (partition z) z) in
      if length z' = length z then z else fix z' in
    let z = fix z0 in
    let l = lookup z in
    let m' = (l q0),(set (map l f)),[],
      (set (map (fun (x,y,z) -> (l x),y,(l z)) d)) in
    renumber m';;

let complement : 'a nfa -> int dfa =
  fun m ->
    let ((q0,f,d',d) as m') = minimize (dfa_of_nfa m) in
    renumber (q0,(minus (states m') f),[],d);;

let diff_dfa : 'a nfa -> 'a nfa -> int dfa =
  fun m1 m2 ->
    complement (union_nfa (complement m1) m2);;

let rec concat : regexp list -> regexp =
  fun l ->
    let concat' r1 r2 =
      match r1,r2 with
      | Union[],_ -> Union[]
      | _,Union[] -> Union[]
      | Concat(l1),Concat(l2) -> Concat (l1@l2)
      | Concat(l1),_ -> Concat (l1@[r2])
      | _,Concat(l2) -> Concat (r1::l2)
      | _,_ -> Concat [r1; r2] in
    match l with
    | [] -> Concat []
    | [h] -> h
    | h::t -> concat' h (concat t);;

let rec union : regexp list -> regexp =
  fun l ->
    let union'' l =
      match set l with
      | [h] -> h
      | _ -> Union l in
    let union' r1 r2 =
      match r1,r2 with
      | Union(l1),Union(l2) -> union'' (l1@l2)
      | Union(l1),_ -> union'' (l1@[r2])
      | _,Union(l2) -> union'' (r1::l2)
      | _,_ -> union'' [r1; r2] in
    match l with
    | [] -> Union []
    | [h] -> h
    | h::t -> union' h (union t);;

let star : regexp -> regexp =
  fun r ->
    match r with
    | Star(_) -> r
    | Concat[] -> r
    | Union[] -> Concat[]
    | _ -> Star(r);;

(*
	let regexp_of_nfa : 'a nfa -> regexp =
	  fun (q0,f,d',d) ->
	    let dd = (map (fun (x,y) -> x,(Concat[]),y) d')@
	      (map (fun (x,y,z) -> x,(Char y),z) d) in
	    let rec merge q1 dd =
	      match dd with
	      | [] -> []
	      | (x,y,z)::t ->
	        if x = q1 or z = q1 then merge q1 t else
	        let y'' = union (y::(flatten (map
	          (fun (x',y',z') -> if x = x' && z = z' then [y'] else []) t))) in
	        let t' = filter (fun (x',y',z') -> not (x = x' && z = z')) t in
	        (x,y'',z)::(merge q1 t') in
	    let rec thin dd f0 =
	      let q = set ((map (fun (x,y,z) -> x) dd)@(map (fun (x,y,z) -> z) dd)) in
	      let rec scan q' =
	        let lookup q1 q2 =
	          flatten (map
	            (fun (x,y,z) -> if x = q1 && z = q2 then [y] else []) dd) in
	        match q' with
	        | [] ->
	          let w1 = union (lookup q0 q0) in
	          if q0 = f0 then star w1 else
	          let w2 = union (lookup q0 f0) in
	          let w3 = union (lookup f0 f0) in
	          let w4 = union (lookup f0 q0) in
	            concat [star w1; w2; star (union [w3; concat [w4; star w1; w2]])]
	        | q1::t ->
	          if q1 = q0 or q1 = f0 then scan t else
	          let q1q1 = lookup q1 q1 in
	          thin (merge q1
	           ((filter (fun (x,y,z) -> not (x = q1 or z = q1)) dd)@
	            (flatten (map (fun (x1,y1,z1) ->
	              flatten (map (fun (x2,y2,z2) ->
	                if x1 = q1 or z2 = q1 or not (z1 = q1 && x2 = q1) then [] else
	                [x1,(if q1q1 = [] then concat [y1; y2] else
	                  concat [y1; star (union q1q1); y2]),z2]) dd)) dd)))) f0 in
	      scan q in
	    union (map (thin dd) f);;

	let string_of_regexp =
	  let rec string_of_regexp i r =
	    let b i' s = if i' <= i then "("^s^")" else s in
	    match r with
	    | Char c -> String.make 1 c
	    | Star r' -> b 7 ((string_of_regexp 6 r')^"*")
	    | Concat [] -> "^"
	    | Concat [r] -> string_of_regexp i r
	    | Concat (h::t) -> b 4 ((string_of_regexp 3 h)^
	                            (string_of_regexp 3 (Concat t)))
	    | Union [] -> "0"
	    | Union [r] -> string_of_regexp i r
	    | Union (h::t) -> b 2 ((string_of_regexp 1 h)^" U "^
	                           (string_of_regexp 1 (Union t))) in
	  string_of_regexp 0;;

	let print_regexp f r =
	  Format.pp_print_string f ("regexp \""^(string_of_regexp r)^"\"");;

(*
	#install_printer print_regexp;;

*)
	(*

	  (aa U ab(bb)*ba)*(b U ab(bb)*a)(a(bb)*a U (b U a(bb)*ba)(aa U ab(bb)*ba)*(b U ab(bb)*a))*

	  (a(bb)*a)*(b U ab(bb)*a)(a(bb)*a U (b U ab(bb)*a)(a(bb)*a)*(b U ab(bb)*a))*

	  a((ab U ba)(aa U bb)*(ab U ba) U aa U bb)*(ab U ba)((ab U ba)(aa U bb)*(ab U ba) U aa U bb)* U b((ab U ba)(aa U bb)*(ab U ba) U aa U bb)*

	  (a(aa U bb)*(ab U ba) U b)((ab U ba)(aa U bb)*(ab U ba) U aa U bb)*

	*)
	
	let rec dfa_of_regexp : regexp -> int dfa =
	  fun r ->
	    match r with
	    | Char c -> 0,[1],[],[0,c,1]
	    | Star r' -> minimize (dfa_of_nfa (star_nfa (dfa_of_regexp r')))
	    | Concat [] -> 0,[0],[],[]
	    | Concat [h] -> dfa_of_regexp h
	    | Concat (h::t) -> minimize (dfa_of_nfa (concat_nfa (dfa_of_regexp h)
	        (dfa_of_regexp (Concat t))))
	    | Union [] -> 0,[],[],[]
	    | Union [h] -> dfa_of_regexp h
	    | Union (h::t) -> minimize (dfa_of_nfa (union_nfa (dfa_of_regexp h)
	        (dfa_of_regexp (Union t))));;
	
	let explode s =
	  let rec explode1 n =
	    try let c = s.[n] in c::(explode1 (n + 1))
	    with Invalid_argument _ -> [] in
	  explode1 0;;
	
	let implode l = fold_right (^) (map (String.make 1) l) "";;
	
	let lex s =
	  filter (fun c -> not (mem c [' '; '\t'; '\n'; '^'])) (explode s);;
	
	let regexp s =
	  let err l =
	    if l = [] then failwith "regexp: string too short" else
	      failwith ("regexp: parse failed before: "^(implode l)) in
	  let rec parse1 uu rr l =
	    match l with
	    | [] -> uu,rr,[]
	    | ')'::_ -> uu,rr,l
	    | '('::t ->
	      let uu',rr',l' = parse1 [] [] t in
	       (match l' with
	        | ')'::t' -> parse1 uu (rr@[union (uu'@[concat rr'])]) t'
	        | _ -> err l')
	    | '*'::t ->
	      let rec st rr =
	        match rr with
	        | [] -> err l
	        | [h] -> [star h]
	        | h::t -> h::(st t) in
	      parse1 uu (st rr) t
	    | 'U'::t -> parse1 (uu@[concat rr]) [] t
	    | '|'::t -> parse1 (uu@[concat rr]) [] t
	    | '0'::t -> parse1 uu (rr@[Union[]]) t
	    | h::t -> parse1 uu (rr@[Char h]) t in
	  let uu,rr,l' = parse1 [] [] (lex s) in
	  if l' = [] then union (uu@[concat rr]) else err l';;
	
	let sample r =
	  let rec sample r =
	    match r with
	    | Char c -> [c]
	    | Star _ -> []
	    | Concat l -> fold_left (@) [] (map sample l)
	    | Union l ->
	      let rec f l =
	        match l with
	        | [] -> failwith "sample"
	        | h::t ->
	          try sample h with Failure "sample" -> f t in
	      f l in
	  implode (sample r);;
	
	let sample_diff : 'a nfa -> 'a nfa -> string =
	  fun m1 m2 ->
	    sample (regexp_of_nfa (diff_dfa m1 m2));;
	
	let dfa : string -> int dfa =
	  fun s ->
	    dfa_of_regexp (regexp s);;
	
	let compare_regexp : string -> string -> string option * string option =
	  fun s1 s2 ->
	    let m1 = dfa s1 in
	    let m2 = dfa s2 in
	    let f m1 m2 =
	      try Some (sample_diff m1 m2)
	      with Failure "sample" -> None in
	    (f m1 m2),(f m2 m1);;
	
	let eq_regexp : string -> string -> bool =
	  fun s1 s2 ->
	    compare_regexp s1 s2 = (None,None);;

*)
*)